library(magrittr)
library(tidyverse)
library(Seurat)
library(readxl)
library(cowplot)
library(colorblindr)
library(viridis)
library(progeny)
library(destiny)

theme_cowplot2 <- function(...) {
  theme_cowplot(font_size = 12, ...) %+replace%
    theme(strip.background = element_blank(),
          plot.background = element_blank())
}
theme_set(theme_cowplot2())
coi <- params$cell_type_super
cell_sort <- params$cell_sort
cell_type_major <- params$cell_type_major
louvain_resolution <- params$louvain_resolution
louvain_cluster <- params$louvain_cluster

1 Cluster markers

1.1 Major T.super markers for cell assign

### load all data ---------------------------------
source("_src/global_vars.R")

# seu_obj <- read_rds(paste0("/work/shah/isabl_data_lake/analyses/16/52/1652/celltypes/", coi, "_processed.rds"))
seu_obj <- read_rds(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/outs_prepost/", coi, "_seurat_", louvain_resolution, ".rds"))

myfeatures <- c("umapharmony_1", "umapharmony_2", "sample", louvain_cluster, "doublet", "nCount_RNA", "nFeature_RNA", "percent.mt", "doublet_score")

plot_data_wrapper <- function(cluster_res) {
  cluster_res <- enquo(cluster_res)
  as_tibble(FetchData(seu_obj, myfeatures)) %>% 
    left_join(meta_tbl, by = "sample") %>% 
    rename(cluster = !!cluster_res) %>% 
    mutate(cluster = as.character(cluster),
           tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite)))
}

plot_data <- plot_data_wrapper(louvain_cluster)

1.2 Subtype currated markers

helper_f <- function(x) ifelse(is.na(x), "", x)

markers_v6_super[[coi]] %>% 
  group_by(subtype) %>% 
  mutate(rank = row_number(gene)) %>% 
  spread(subtype, gene) %>% 
  mutate_all(.funs = helper_f) %>% 
  formattable::formattable()
rank CD4.T.dysfunctional CD4.T.naive CD4.T.reg CD8.T Cycling.T.NK MT.high.T.NK NK.CD56 NK.Cytotoxic
1 CD4 CCR7 CD4 CCL4 CDC20 IGKC CD63 ADGRG1
2 CD40LG CD4 FOXP3 CD8A CDK1 MALAT1 CD7 CX3CR1
3 CTLA4 IL7R IL2RA CD8B MKI67 MIAT FCER1G FCER1G
4 CXCL13 KLF2 TNFRSF4 CRTAM PTTG1 MT-ND6 GNLY FCGR3A
5 FKBP5 TCF7 TRAC GZMA TOP2A MTRNR2L12 KLRC1 FGFBP2
6 IL6ST GZMB XIST KLRD1 GNLY
7 ITM2A GZMK KLRF1 GZMH
8 MAF GZMM KRT81 IGFBP7
9 NMB IFNG KRT86 KLRD1
10 NR3C1 LAG3 NCAM1 KLRF1
11 PDCD1 MT1E NKG7 NKG7
12 TNFRSF4 MT1X TYROBP PLAC8
13 TOX2 MT2A XCL1 PLEK
14 TSHZ2 PTMS XCL2 PTGDS
15 TRGC2 SPON2
16 TYROBP

1.3 Subtype cluster markers

# marker_tbl <- read_tsv(paste0("/work/shah/isabl_data_lake/analyses/16/52/1652/celltypes/", coi, "_markers.tsv")) %>% 
#   filter(resolution == louvain_resolution)
marker_tbl <- read_tsv(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/outs_prepost/", coi, "_markers_", louvain_resolution, ".tsv"))

## Hypergeometric test --------------------------------------

test_set <- marker_tbl %>% 
  group_by(cluster) %>% 
  filter(!str_detect(gene, "^RPS|^RPL")) %>% 
  slice(1:50) %>% 
  mutate(k = length(cluster)) %>% 
  ungroup %>%
  select(cluster, gene, k) %>% 
  mutate(join_helper = 1) %>% 
  group_by(cluster, join_helper, k) %>%
  nest(test_set = gene)

markers_doub_tbl <- markers_v6 %>% 
  enframe("subtype", "gene") %>% 
  filter(!(subtype %in% unique(c(coi, cell_type_major)))) %>% 
  unnest(gene) %>% 
  group_by(gene) %>% 
  filter(length(gene) == 1) %>% 
  mutate(subtype = paste0("doublet.", subtype)) %>% 
  bind_rows(tibble(subtype = "Mito.high", gene = grep("^MT-", rownames(seu_obj), value = T)))

ref_set <- markers_v6_super[[coi]] %>% 
  bind_rows(markers_doub_tbl) %>% 
  group_by(subtype) %>% 
  mutate(m = length(gene),
         n = length(rownames(seu_obj))-m,
         join_helper = 1) %>% 
  group_by(subtype, m, n, join_helper) %>%
  nest(ref_set = gene)

hyper_tbl <- test_set %>% 
  left_join(ref_set, by = "join_helper") %>% 
  group_by(cluster, subtype, m, n, k) %>%
  do(q = length(intersect(unlist(.$ref_set), unlist(.$test_set)))) %>%
  mutate(pval = 1-phyper(q = q, m = m, n = n, k = k)) %>%
  ungroup %>%
  mutate(qval = p.adjust(pval, "BH"),
         sig = qval < 0.01)

# hyper_tbl %>% 
#   group_by(subtype) %>% 
#   filter(any(qval < 0.01)) %>%
#   ggplot(aes(subtype, -log10(qval), fill = sig)) +
#   geom_bar(stat = "identity") +
#   facet_wrap(~cluster) +
#   coord_flip()
  
low_rank <- str_detect(unique(hyper_tbl$subtype), "doublet")
subtype_lvl <- c(sort(unique(hyper_tbl$subtype)[!low_rank]), sort(unique(hyper_tbl$subtype)[low_rank]))
  
cluster_label_tbl <- hyper_tbl %>% 
  mutate(subtype = ordered(subtype, levels = subtype_lvl)) %>% 
  arrange(qval, subtype) %>%
  group_by(cluster) %>% 
  slice(1) %>% 
  mutate(subtype = ifelse(sig, as.character(subtype), paste0("unknown_", cluster))) %>% 
  select(cluster, cluster_label = subtype) %>% 
  ungroup %>% 
  mutate(cluster_label = make.unique(cluster_label, sep = "_"))

seu_obj$cluster_label <- unname(deframe(cluster_label_tbl)[as.character(unlist(seu_obj[[paste0("RNA_snn_res.", louvain_resolution)]]))])
plot_data$cluster_label <- seu_obj$cluster_label

marker_sheet <- marker_tbl %>% 
  left_join(cluster_label_tbl, by = "cluster") %>% 
  group_by(cluster_label) %>% 
  filter(!str_detect(gene, "^RPS|^RPL")) %>% 
  slice(1:50) %>% 
  mutate(rank = row_number(-avg_logFC)) %>% 
  select(cluster_label, gene, rank) %>% 
  spread(cluster_label, gene) %>% 
  mutate_all(.funs = helper_f)

formattable::formattable(marker_sheet)
rank CD4.T.dysfunctional CD4.T.naive CD4.T.reg CD8.T CD8.T_1 Cycling.T.NK doublet.B.cell doublet.Fibroblast doublet.Monocyte MT.high.T.NK NK.CD56 NK.Cytotoxic unknown_12
1 CXCL13 IL7R TNFRSF4 CD8A APOE STMN1 IGHM DCN CST3 IGKC KLRC1 FGFBP2 LST1
2 NMB CCR7 FOXP3 CD8B HLA-DRB5 MKI67 MS4A1 IGFBP5 S100A8 MALAT1 TYROBP FCGR3A AREG
3 MAF CD40LG IL2RA GZMK ATP5F1E TOP2A MEF2C RBP1 S100A9 MIAT GNLY SPON2 IL4I1
4 TSHZ2 MAL RTKN2 CCL4L2 CCL3L1 CENPF CD79A C7 CXCL8 IGLC3 FCER1G KLRF1 PCDH9
5 IL6ST TCF7 BATF CRTAM GZMK ASPM BANK1 CALD1 SPP1 NEAT1 AREG CX3CR1 SCN1B
6 CD40LG GPR183 CTLA4 TRGC2 SH2D1A TYMS BCL11A MEG3 HLA-DRA IGLC2 TRDC PRF1 SPINK2
7 CPM EEF1B2 TBC1D4 GZMH C1QB TUBA1B TNFRSF13C SOX4 LYZ NABP1 KRT81 PLAC8 TNFSF13B
8 ZBED2 LEF1 GADD45A LAG3 SPP1 NUSAP1 MARCH1 RARRES2 APOE SPOCK2 XCL1 PTGDS KIT
9 IGFL2 TPT1 LTB IFNG PRMT9 TUBB VPREB3 IGFBP4 C1QB IL7R IGFBP2 PLEK IL1R1
10 PDCD1 SELL TNFRSF18 CCL5 HIST1H4C PCLAF RALGPS2 NR2F2 AIF1 CD84 XCL2 GNLY LTC4S
11 CD4 EEF1A1 PMAIP1 CCL4 CD8B HMGB2 IGHD ADIRF MARCKS CD2 KRT86 KLRD1 TOX2
12 ICA1 AQP3 IKZF2 ITM2C APOC1 HIST1H4C TCF4 SELENOP C1QA CLEC2D KLRD1 ADGRG1 CSF2
13 TOX2 TRABD2A SOX4 DTHD1 IL32 HIST1H1B BASP1 MDK FN1 CD3G CEBPD CLIC3 KRT81
14 CTLA4 LTB SAT1 MT1X GPR183 TPX2 ARHGAP24 MGP FTL B2M CLIC3 IGFBP7 AFF3
15 TNFRSF4 ANK3 TIGIT JAML SLC2A3 UBE2C CD83 TAGLN APOC1 CD3D TXK NKG7 SCX
16 ITM2A CTSL ICOS CCL3L1 KLRG1 CLSPN CD22 STAR G0S2 HLA-A TMIGD2 GZMB PLAT
17 CD200 TNFRSF25 TNFRSF9 TNIP3 CKAP2 PCNA SWAP70 ADAMTS1 MNDA CD74 MATK PRSS23 IL23R
18 PTPN13 PASK LAYN CXCR6 CD3G HMGN2 ADAM28 LUM BASP1 CYBA IL2RB AKR1C3 TLE1
19 CCDC50 DPP4 TNFRSF1B TNFSF9 CD8A SMC4 AFF3 SFRP4 MS4A6A HLA-B KLRB1 EFHD2 SVIL
20 CHN1 ITGA6 F5 THEMIS CCL4L2 KNL1 CYBB C1R IL1B HSPH1 KLRC2 S1PR5 CD300LF
21 BHLHE40-AS1 CMTM8 IL1R1 HLA-DPB1 BCL2A1 SMC2 TCL1A PEG3 CD14 HLA-C NCAM1 TYROBP RORC
22 CH25H MYC IL32 MT1E HMGA1 H2AFZ CD24 SPARCL1 C1QC MAT2A CTSW FCER1G TNFSF11
23 SMCO4 TSHZ2 CD4 HLA-DRB1 CD2 CKS1B HVCN1 FHL2 LST1 FOSB HOPX GZMH LIF
24 BTLA TOB1 MIR4435-2HG GZMA PTGER4 BIRC5 BLK C1S CSF3R RCSD1 LAT2 CST7 TGM2
25 DUSP4 RACK1 IL1R2 KLRG1 PECAM1 CENPE LY86 AKAP12 FTH1 SPTBN1 CXXC5 HOPX ZG16B
26 GNG4 CCR6 ENTPD1 HLA-DPA1 GIMAP7 RRM2 FCRL5 NR2F1 GRN CXCR3 CD7 MYOM2 ENPP1
27 SESN3 CYSLTR1 ICA1 DUSP2 GZMA DLGAP5 FCRL1 SERPINF1 CYBB ERO1A SH2D1B MYBL1 IRAK3
28 CYSLTR1 FXYD1 BIRC3 MT2A CST7 DUT SCIMP CLU NPC2 TXK SRGAP3 CHST2 PTGER3
29 BATF LPAR6 IFI27 COTL1 ATAD2 PAX5 EGR1 SPI1 GRAP2 CCL3 CEP78 FOXC1
30 MIR155HG CHD7 CD27 APOBEC3G CDK1 BLNK WFDC2 RNASE1 RUNX1 TNFRSF18 CCL3 CA2
31 RILPL2 UBA52 GLRX CST7 HELLS SPIB SERPING1 FCGR2A TCF7 CD63 CD247 KIAA1211L
32 TNFRSF25 JUNB TYMP FABP5 GTSE1 PKIG PTCRA SERPINA1 ITGB7 GSTP1 MTSS1 COL4A4
33 ETV7 TIMP1 SLAMF1 PDCD1 CDKN3 CD19 CDKN1C PLAUR POU2F2 NKG7 CD300A SLC4A10
34 TNFRSF18 SESN3 BTG3 CD3D UBE2S GNG7 TCEAL9 GLUL VSIR KLRF1 FGR NRP1
35 CTSL NACA DUSP4 MT1F ZWINT COBLL1 TPM2 MS4A7 HAVCR2 MCTP2 PTGDR PDZK1
36 MYO7A CD4 PIM2 CD3G CKS2 FAM30A DLK1 CD83 LAT IFITM3 GSAP APOL4
37 CD84 RASGRP2 LAIR2 ZNF683 PTTG1 CD40 IFITM3 MAFB SRGN ITGAX SH2D1B B3GALT5
38 ENTPD1 FBLN5 MAF CCR5 KIF11 WDFY4 COL6A1 IER3 MYO1G KIR2DL4 ADRB2 BCAS1
39 TP53INP1 SATB1 MAST4 EOMES H2AFX FCRL2 COL1A2 CD68 PPT1 ZNF683 CTSW PRAM1
40 ADAM19 S1PR1 HPGD GZMM TK1 BTK FILIP1L CPVL ZEB1 MAFF FCRL6 NECTIN2
41 ICOS CAMK4 S100A4 CD27 HMMR TLR10 NUPR1 ALDH2 TIMP1 SLC16A3 XBP1 LINGO4
42 RGS1 RIPOR2 SPOCK2 OASL MCM7 GAPT SLC40A1 CSF2RA ICAM2 ATP8B4 TXK HOXA5
43 SRGN ANXA1 PHLDA1 DNAJB1 TMPO POU2AF1 MARCKSL1 FGL2 GPR174 B3GNT7 LYN PRKG1
44 CD82 MGAT4A LAPTM4B PECAM1 TUBB4B EBF1 TSC22D1 OLR1 NAMPT ADGRG3 TGFBR3 PDZD2
45 TOX TMEM204 HERC5 MIR155HG BRCA1 FCRLA CST3 GPNMB KLRG1 GZMB CD160 SLCO2A1
46 SPOCK2 LTA SIT1 EZH2 CPNE5 RARRES1 FPR1 KDM6B ITGA1 KLRB1 PKDCC
47 PRDM1 ETV7 GPR174 MCM4 OSBPL10 GNG11 SLC11A1 TMEM156 ZBTB16 AREG DACH1
48 DGKH POU2F2 GRAP2 MAD2L1 FAM129C CFH TGFBI SESN3 CLNK GPR65 BICC1
49 BIRC3 NCF4 IKZF3 CENPM HLA-DOB PDGFRA TNFAIP2 FKBP11 ADAM28 ITGAM EMID1
50 TRAT1 ID3 ITGA1 CEP55 SSPN FBLN1 CCDC88A GALM CD160 SAMD3 CD33
write_tsv(marker_sheet, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_marker_sheet.tsv"))

2 Clusters

2.1 sizes

enframe(sort(table(seu_obj$cluster_label))) %>% 
  mutate(name = ordered(name, levels = rev(name))) %>% 
  ggplot() +
  geom_bar(aes(name, value), stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(y = c("#cells"), x = "cluster")

2.2 UMAP

alpha_lvl <- ifelse(nrow(plot_data) < 20000, 0.2, 0.1)
pt_size <- ifelse(nrow(plot_data) < 20000, 0.2, 0.05)

common_layers_disc <- list(  
  geom_point(size = pt_size, alpha = alpha_lvl),
  NoAxes(),
  guides(color = guide_legend(override.aes = list(size = 2, alpha = 1))),
  labs(color = "")
)

common_layers_cont <- list(  
  geom_point(size = pt_size, alpha = alpha_lvl),
  NoAxes(),
  scale_color_gradientn(colors = viridis(9)),
  guides(color = guide_colorbar())
)

ggplot(plot_data, aes(umapharmony_1, umapharmony_2, color = cluster_label)) + 
  common_layers_disc +
  #facet_wrap(~therapy) +
  ggtitle("Sub cluster")

3 Filtering out doublet and mito high clusters

3.1 UMAP

my_subtypes <- names(clrs$cluster_label[[coi]])

my_subtypes <- c(my_subtypes, unlist(lapply(paste0("_", 1:3), function(x) paste0(my_subtypes, x)))) %>% .[!str_detect(., "doublet")]

cells_to_keep <- colnames(seu_obj)[seu_obj$cluster_label %in% my_subtypes]
# seu_obj_sub <- subset(seu_obj, cells = cells_to_keep)
# seu_obj_sub <- RunUMAP(seu_obj_sub, dims = 1:50, reduction = "harmony", reduction.name = "umapharmony", reduction.key = "umapharmony_")
# seu_obj_sub$cluster_label <- seu_obj$cluster_label[colnames(seu_obj) %in% colnames(seu_obj_sub)]
# write_rds(seu_obj_sub, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered.rds"))
seu_obj_sub <- read_rds(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered.rds"))

plot_data_sub <- as_tibble(FetchData(seu_obj_sub, c(myfeatures, "cluster_label"))) %>% 
  left_join(meta_tbl, by = "sample") %>% 
  mutate(patient_id = str_remove_all(patient_id, "SPECTRUM-OV-"),
         tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite))) %>% 
  mutate(cell_id = colnames(seu_obj_sub),
         cluster_label = ordered(cluster_label, levels = my_subtypes),
         )
  
if (cell_sort == "CD45+") {
plot_data_sub <- filter(plot_data_sub, sort_parameters != "singlet, live, CD45-", !is.na(tumor_supersite))
}

if (cell_sort == "CD45-") {
plot_data_sub <- filter(plot_data_sub, sort_parameters != "singlet, live, CD45+", !is.na(tumor_supersite))
}

ggplot(plot_data_sub, aes(umapharmony_1, umapharmony_2, color = cluster_label)) + 
  common_layers_disc +
  ggtitle("Sub cluster") +
  #facet_wrap(~cluster_label) +
  scale_color_manual(values = clrs$cluster_label[[coi]])

ggplot(plot_data_sub, aes(umapharmony_1, umapharmony_2, color = patient_id_short)) + 
  common_layers_disc +
  ggtitle("Patient") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$patient_id_short)

ggplot(plot_data_sub, aes(umapharmony_1, umapharmony_2, color = tumor_supersite)) + 
  # geom_point(aes(umapharmony_1, umapharmony_2), 
  #            color = "grey90", size = 0.01, 
  #            data = select(plot_data_sub, -tumor_supersite)) +
  common_layers_disc +
  ggtitle("Site") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$tumor_supersite)

write_tsv(select(plot_data_sub, cell_id, everything(), -umapharmony_1, -umapharmony_2, -contains("RNA_")), paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_embedding.tsv"))

3.2 add module scores and pathway scores

signature_modules <- read_excel("_data/small/signatures/SPECTRUM v5 sub cluster markers.xlsx", sheet = 2, skip = 1, range = "M2:P100") %>% 
  gather(module, gene) %>% 
  na.omit() %>% 
  group_by(module) %>% 
  do(gene = c(.$gene)) %>% 
  {setNames(.$gene, .$module)}

signature_modules$ISG.module <- c("CCL5", "CXCL10", "IFNA1", "IFNB1", "ISG15", "IFI27L2", "SAMD9L")

## compute expression module scores
for (i in 1:length(signature_modules)) {
  seu_obj_sub <- AddModuleScore(seu_obj_sub, features = signature_modules[i], name = names(signature_modules)[i])
  seu_obj_sub[[names(signature_modules)[i]]] <- seu_obj_sub[[paste0(names(signature_modules)[i], "1")]]
  seu_obj_sub[[paste0(names(signature_modules)[i], "1")]] <- NULL
  print(paste(names(signature_modules)[i], "DONE"))
}
## [1] "CD8.Cytotoxic DONE"
## [1] "CD8.Dysfunctional DONE"
## [1] "CD8.Naive DONE"
## [1] "CD8.Predysfunctional DONE"
## [1] "ISG.module DONE"
## compute progeny scores
progeny_list <- seu_obj_sub@assays$RNA@data[VariableFeatures(seu_obj_sub),] %>% 
  as.matrix %>% 
  progeny %>% 
  as.data.frame %>% 
  as.list

names(progeny_list) <- make.names(paste0(names(progeny_list), ".pathway"))

for (i in 1:length(progeny_list)) {
  seu_obj_sub <- AddMetaData(seu_obj_sub, metadata = progeny_list[[i]], 
                             col.name = names(progeny_list)[i])
}

write_rds(seu_obj_sub, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_processed_filtered_annotated.rds"))

3.3 marker heatmap

marker_top_tbl <- marker_sheet[,-1] %>% 
  slice(1:10) %>% 
  as.list %>% 
  .[!str_detect(names(.), "doublet")] %>% 
  enframe("cluster_label_x", "gene") %>% 
  unnest(gene)

plot_data_markers <- as_tibble(FetchData(seu_obj_sub, c("cluster_label", myfeatures, unique(marker_top_tbl$gene)))) %>% 
  gather(gene, value, -c(1:(length(myfeatures)+1))) %>% 
  left_join(meta_tbl, by = "sample") %>% 
  mutate(patient_id = str_remove_all(patient_id, "SPECTRUM-OV-"),
         tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite))) %>% 
  mutate(cluster_label = ordered(cluster_label, levels = my_subtypes)) %>% 
  group_by(cluster_label, gene) %>% 
  summarise(value = mean(value, na.rm = T)) %>% 
  group_by(gene) %>% 
  mutate(value = scales::rescale(value)) %>% 
  left_join(marker_top_tbl, by = "gene") %>% 
  mutate(cluster_label_x = ordered(cluster_label_x, levels = rev(names(clrs$cluster_label[[coi]]))))

ggplot(plot_data_markers) +
  geom_tile(aes(gene, cluster_label, fill = value)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  facet_grid(~cluster_label_x, scales = "free", space = "free") +
  scale_fill_gradientn(colors = viridis(9)) +
  labs(fill = "Scaled\nexpression") +
  theme(aspect.ratio = 1,
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank())

# ggsave(paste0("_fig/002_marker_heatmap_", coi, ".pdf"), width = nrow(marker_top_tbl)/6, height = 5)

3.4 composition

3.4.1 per site

comp_site_tbl <- plot_data_sub %>%
  filter(!is.na(tumor_supersite)) %>% 
  group_by(cluster_label, tumor_supersite) %>%
  tally %>%
  group_by(tumor_supersite) %>%
  mutate(nrel = n/sum(n)*100) %>%
  ungroup

pnrel_site <- ggplot(comp_site_tbl) +
  geom_bar(aes(tumor_supersite, nrel, fill = cluster_label),
           stat = "identity", position = position_stack()) +
  scale_y_continuous(expand = c(0, 0)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(fill = "Cluster", y = "Fraction [%]", x = "") +
  scale_fill_manual(values = clrs$cluster_label[[coi]])

pnabs_site <- ggplot(comp_site_tbl) +
  geom_bar(aes(tumor_supersite, n, fill = cluster_label),
           stat = "identity", position = position_stack()) +
  scale_y_continuous(expand = c(0, 0)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(fill = "Cluster", y = "# cells", x = "") +
  scale_fill_manual(values = clrs$cluster_label[[coi]])

plot_grid(pnabs_site, pnrel_site, ncol = 2, align = "h")

# ggsave(paste0("_fig/02_deep_dive_", coi, "_comp_site.pdf"), width = 8, height = 4)

3.4.2 per sample

comp_tbl_sample_sort <- plot_data_sub %>% 
  group_by(tumor_subsite, tumor_supersite, patient_id, consensus_signature, therapy, cluster_label) %>% 
  tally %>% 
  group_by(tumor_subsite, tumor_supersite, patient_id, consensus_signature, therapy) %>% 
  mutate(nrel = n/sum(n)*100,
         nsum = sum(n),
         log10n = log10(n),
         label_supersite = "Site",
         label_mutsig = "Signature",
         label_therapy = "Rx") %>% 
  ungroup %>% 
  arrange(desc(therapy), tumor_supersite) %>% 
  mutate(tumor_subsite_rx = paste0(tumor_subsite, "_", therapy)) %>% 
  mutate(tumor_subsite = ordered(tumor_subsite, levels = unique(tumor_subsite)),
         tumor_subsite_rx = ordered(tumor_subsite_rx, levels = unique(tumor_subsite_rx))) %>% 
  arrange(patient_id) %>% 
  mutate(label_patient_id = ifelse(as.logical(as.numeric(fct_inorder(as.character(patient_id)))%%2), "Patient1", "Patient2"))

sample_id_x_tbl <- plot_data_sub %>% 
  mutate(sort_short_x = cell_sort) %>% 
  distinct(patient_id, sort_short_x, tumor_subsite, therapy, sample) %>% 
  unite(sample_id_x, patient_id, sort_short_x, tumor_subsite, therapy) %>% 
  arrange(sample_id_x)

comp_tbl_sample_sort %>% 
  mutate(sort_short_x = cell_sort) %>% 
  unite(sample_id_x, patient_id, sort_short_x, tumor_subsite_rx) %>% 
  select(sample_id_x, cluster_label, n, nrel, nsum) %>% 
  left_join(sample_id_x_tbl, by = "sample_id_x") %>% 
  write_tsv(paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_subtype_compositions.tsv"))
ybreaks <- c(500, 1000, 2000, 4000, 6000, 8000, 10000, 15000, 20000)

max_cells_per_sample <- max(comp_tbl_sample_sort$nsum)
ymaxn <- ybreaks[max_cells_per_sample < ybreaks][1]

comp_plot_wrapper <- function(y = "nrel", switch = NULL) {
  if (y == "nrel") ylab <- paste0("Fraction\nof cells [%]")
  if (y == "n") ylab <- paste0("Number\nof cells")
  p <- ggplot(comp_tbl_sample_sort, 
              aes_string("tumor_subsite_rx", y, fill = "cluster_label")) + 
    facet_grid(~patient_id, space = "free", scales = "free", switch = switch) +
    coord_cartesian(clip = "off") + 
    scale_fill_manual(values = clrs$cluster_label[[coi]]) + 
    theme(axis.text.x = element_blank(),
          axis.title.y = element_text(angle = 0, vjust = 0.5, hjust = 0.5, 
                                      margin = margin(0, -0.4, 0, 0, unit = "npc")),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          axis.line.x = element_blank(),
          strip.text.y = element_blank(),
          strip.text.x = element_blank(),
          strip.background.y = element_blank(),
          strip.background.x = element_blank(),
          plot.margin = margin(0, 0, 0, 0, "npc")) + 
    labs(x = "",
         y = ylab) +
    guides(fill = FALSE)
  if (y == "nrel") p <- p + 
    geom_bar(stat = "identity") +
    scale_y_continuous(expand = c(0, 0), 
                       breaks = c(0, 50, 100), 
                       labels = c("0", "50", "100"))
  if (y == "n") p <- p + 
    geom_bar(stat = "identity", position = position_stack()) +
    scale_y_continuous(expand = c(0, 0), 
                       breaks = c(0, ymaxn/2, ymaxn),
                       limits = c(0, ymaxn),
                       labels = c("", ymaxn/2, ymaxn)) +
    expand_limits(y = c(0, ymaxn)) +
    theme(panel.grid.major.y = element_line(linetype = 1, color = "grey90", size = 0.5))
  return(p)
} 

common_label_layers <- list(
  geom_tile(color = "white", size = 0.15),
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.line.x = element_blank(),
        strip.text = element_blank(),
        strip.background = element_blank(),
        plot.margin = margin(0, 0, 0, 0, "npc")),
  scale_y_discrete(expand = c(0, 0)),
  labs(y = ""),
  guides(fill = FALSE),
  facet_grid(~patient_id, 
             space = "free", scales = "free")
)

comp_label_site <- ggplot(distinct(comp_tbl_sample_sort, tumor_subsite_rx, therapy, tumor_supersite, label_supersite, patient_id), 
       aes(tumor_subsite_rx, label_supersite, 
           fill = tumor_supersite)) + 
  scale_fill_manual(values = clrs$tumor_supersite) +
  common_label_layers

comp_label_rx <- ggplot(distinct(comp_tbl_sample_sort, tumor_subsite_rx, therapy, tumor_supersite, label_therapy, consensus_signature, patient_id), 
       aes(tumor_subsite_rx, label_therapy, 
           fill = therapy)) + 
  scale_fill_manual(values = c(`post-Rx` = "gold3", `pre-Rx` = "steelblue")) +
  common_label_layers

comp_label_mutsig <- ggplot(distinct(comp_tbl_sample_sort, tumor_subsite_rx, therapy, tumor_supersite, label_mutsig, consensus_signature, patient_id), 
       aes(tumor_subsite_rx, label_mutsig, 
           fill = consensus_signature)) + 
  scale_fill_manual(values = clrs$consensus_signature) +
  common_label_layers

patient_label_tbl <- distinct(comp_tbl_sample_sort, patient_id, .keep_all = T)

comp_label_patient_id <- ggplot(patient_label_tbl, aes(tumor_subsite_rx, label_patient_id)) + 
  scale_fill_manual(values = clrs$consensus_signature) +
  geom_text(aes(tumor_subsite_rx, label_patient_id, label = patient_id)) +
  facet_grid(~patient_id, 
             space = "free", scales = "free") +
  coord_cartesian(clip = "off") + 
  theme_void() +
  theme(strip.text = element_blank(),
        strip.background = element_blank(),
        plot.margin = margin(0, 0, 0, 0, "npc"))

hist_plot_wrapper <- function(x = "nrel") {
  if (x == "nrel") {
    xlab <- paste0("Fraction of cells [%]")
    bw <- 5
  }
  if (x == "log10n") {
    xlab <- paste0("Number of cells")
    bw <- 0.2
  }
  p <- ggplot(comp_tbl_sample_sort) +
    ggridges::geom_density_ridges(
      aes_string(x, "cluster_label", fill = "cluster_label"), color = "black",
      stat = "binline", binwidth = bw, scale = 3) +
    facet_grid(label_supersite~., 
               space = "free", scales = "free") +
    scale_fill_manual(values = clrs$cluster_label[[coi]]) + 
    theme(axis.text.y = element_blank(),
          axis.ticks.y = element_blank(),
          axis.title.y = element_blank(),
          axis.line.y = element_blank(),
          strip.text = element_blank(),
          strip.background = element_blank(),
          plot.margin = margin(0, 0, 0, 0, "npc")) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_discrete(expand = c(0, 0)) +
    guides(fill = FALSE) +
    labs(x = xlab)
  if (x == "log10n") p <- p + expand_limits(x = c(0, 3)) + 
    scale_x_continuous(expand = c(0, 0), 
                       labels = function(x) parse(text = paste("10^", x)))
  return(p)
}

pcomp1 <- comp_plot_wrapper("n")
pcomp2 <- comp_plot_wrapper("nrel")

pcomp_grid <- plot_grid(comp_label_patient_id, 
                        pcomp1, pcomp2, 
                        comp_label_site, comp_label_rx, comp_label_mutsig,
                        ncol = 1, align = "v", 
                        rel_heights = c(0.15, 0.33, 0.33, 0.06, 0.06, 0.06))

phist1 <- hist_plot_wrapper("log10n")

pcomp_hist_grid <- ggdraw() +
  draw_plot(pcomp_grid, x = 0.01, y = 0, width = 0.85, height = 1) +
  draw_plot(phist1, x = 0.87, y = 0.05, width = 0.12, height = 0.8)

pcomp_hist_grid

# ggsave(paste0("_fig/02_composition_v6_",coi,".pdf"), pcomp_hist_grid, width = 10, height = 2)

3.4.3 site specific cluster enrichment

comp_tbl_z <- comp_tbl_sample_sort %>% 
  filter(therapy == "pre-Rx",
         !(tumor_supersite %in% c("Ascites", "Other"))) %>% 
  group_by(patient_id, cluster_label) %>% 
  arrange(patient_id, cluster_label, nrel) %>% 
  mutate(rank = row_number(nrel),
         z_rank = scales::rescale(rank)) %>% 
  mutate(mean_nrel = mean(nrel, na.rm = T),
         sd_nrel = sd(nrel, na.rm = T),
         z_nrel = (nrel - mean_nrel) / sd_nrel) %>% 
  ungroup()

ggplot(comp_tbl_z) +
  geom_boxplot(aes(tumor_supersite, z_nrel, color = tumor_supersite), 
               outlier.shape = NA) +
  geom_point(aes(tumor_supersite, z_nrel, color = tumor_supersite), position = position_jitter(), size = 0.1) +
  facet_grid(~cluster_label, scales = "free_x", space = "free_x") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        aspect.ratio = 5) +
  scale_color_manual(values = clrs$tumor_supersite)

ggplot(comp_tbl_z) +
  geom_boxplot(aes(tumor_supersite, z_rank, color = tumor_supersite), 
               outlier.shape = NA) +
  geom_point(aes(tumor_supersite, z_rank, color = tumor_supersite), position = position_jitter(), size = 0.1) +
  facet_grid(~cluster_label, scales = "free_x", space = "free_x") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        aspect.ratio = 5) +
  scale_color_manual(values = clrs$tumor_supersite)

3.5 sub cluster CD8 cells

# seu_obj_cd8 <- seu_obj_sub %>%
#   subset(subset = cluster_label == "CD8.T") %>%
#   FindNeighbors(reduction = "harmony", dims = 1:50) %>%
#   FindClusters(resolution = 0.2) %>%
#   RunUMAP(dims = 1:50, reduction = "harmony", reduction.name = "umapharmony", reduction.key = "umapharmony_")
# 
# write_rds(seu_obj_cd8, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed.rds")
seu_obj_cd8 <- read_rds("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed.rds")

# marker_tbl <- FindAllMarkers(seu_obj_cd8, only.pos = T)
# write_tsv(as_tibble(marker_tbl), "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_markers.tsv")
marker_tbl <- read_tsv("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_markers.tsv")

## Hypergeometric test --------------------------------------

test_set <- marker_tbl %>% 
  group_by(cluster) %>% 
  filter(!str_detect(gene, "^RPS|^RPL")) %>% 
  slice(1:50) %>% 
  mutate(k = length(cluster)) %>% 
  ungroup %>%
  select(cluster, gene, k) %>% 
  mutate(join_helper = 1) %>% 
  group_by(cluster, join_helper, k) %>%
  nest(test_set = gene)

markers_doub_tbl <- markers_v6 %>% 
  enframe("subtype", "gene") %>% 
  filter(!(subtype %in% unique(c(coi, cell_type_major)))) %>% 
  unnest(gene) %>% 
  group_by(gene) %>% 
  filter(length(gene) == 1) %>% 
  mutate(subtype = paste0("doublet.", subtype)) %>% 
  bind_rows(tibble(subtype = "Mito.high", gene = grep("^MT-", rownames(seu_obj), value = T)))

ref_set <- markers_v6_super[["CD8.T"]] %>% 
  bind_rows(markers_doub_tbl) %>% 
  group_by(subtype) %>% 
  mutate(m = length(gene),
         n = length(rownames(seu_obj))-m,
         join_helper = 1) %>% 
  group_by(subtype, m, n, join_helper) %>%
  nest(ref_set = gene)

hyper_tbl <- test_set %>% 
  left_join(ref_set, by = "join_helper") %>% 
  group_by(cluster, subtype, m, n, k) %>%
  do(q = length(intersect(unlist(.$ref_set), unlist(.$test_set)))) %>%
  mutate(pval = 1-phyper(q = q, m = m, n = n, k = k)) %>%
  ungroup %>%
  mutate(qval = p.adjust(pval, "BH"),
         sig = qval < 0.01)

# hyper_tbl %>% 
#   group_by(subtype) %>% 
#   filter(any(qval < 0.01)) %>%
#   ggplot(aes(subtype, -log10(qval), fill = sig)) +
#   geom_bar(stat = "identity") +
#   facet_wrap(~cluster) +
#   coord_flip()
  
low_rank <- str_detect(unique(hyper_tbl$subtype), "Mito|doublet")
subtype_lvl <- c(sort(unique(hyper_tbl$subtype)[!low_rank]), sort(unique(hyper_tbl$subtype)[low_rank]))
  
cluster_label_tbl <- hyper_tbl %>% 
  mutate(subtype = ordered(subtype, levels = subtype_lvl)) %>% 
  arrange(qval, subtype) %>%
  group_by(cluster) %>% 
  slice(1) %>% 
  mutate(subtype = ifelse(sig, as.character(subtype), paste0("unknown_", cluster))) %>% 
  select(cluster, cluster_label = subtype) %>% 
  ungroup %>% 
  mutate(cluster_label = make.unique(cluster_label, sep = "_"))

marker_sheet <- marker_tbl %>% 
  left_join(cluster_label_tbl, by = "cluster") %>% 
  group_by(cluster_label) %>% 
  filter(!str_detect(gene, "^RPS|^RPL")) %>% 
  slice(1:50) %>% 
  mutate(rank = row_number()) %>% 
  select(cluster_label, gene, rank) %>% 
  spread(cluster_label, gene) %>% 
  mutate_all(.funs = helper_f)

formattable::formattable(marker_sheet)
rank CD8.T.activated.CD40LG CD8.T.activated.HSP CD8.T.dysfunctional.CXCL13 CD8.T.dysfunctional.ISG CD8.T.naive doublet.Plasma.cell gd.T.cell
1 CD40LG HSPA1A CXCL13 IFIT3 IL7R IGHG1 TRDC
2 CD4 HSPA1B GZMB IFIT1 KLRG1 IGLC3 GNLY
3 IL7R MT1X CCL3 IFIT2 GZMK IGKC KLRC2
4 TNFRSF4 MT1E HAVCR2 ISG15 GPR183 IGLC2 XCL1
5 LEF1 HSPA6 TNFRSF9 MX1 TCF7 IGHG3 FCER1G
6 TNFRSF25 DNAJB1 KRT86 RSAD2 EEF1B2 IGHGP AREG
7 CTSL MT1G CTLA4 IFI6 SORL1 IGHG4 TYROBP
8 CCR7 HSPH1 PHLDA1 MX2 PLEK JCHAIN TRGC1
9 TIMP1 HSP90AA1 MIR155HG HERC5 CCR7 IGHM HOPX
10 RORA HSP90AB1 IFNG IFI44L MYBL1 IGHA1 IKZF2
11 SELL DNAJA4 CCL4L2 OAS1 RIPOR2 MZB1 NCR3
12 MAF DNAJB4 ENTPD1 USP18 NCF1 CCL3L1 TRGC2
13 LTB ZFAND2A SRGAP3 OAS3 PTGDR CD7
14 KLRB1 HSPB1 SPRY1 ISG20 PLCG2 IFITM3
15 NFKBIA DUSP1 TNFRSF18 CMPK2 TNFRSF18
16 PLCG2 SLC30A1 AKAP5 TNFSF10 XCL2
17 TNFAIP3 FOS VCAM1 EPSTI1 KIR3DL2
18 JUN CLNK IFI44 ZNF683
19 FKBP4 CD63 SAMD9L ITGAX
20 FOSB LAYN NT5C3A TXK
21 MT2A TIGIT IFI27 KLRD1
22 JUNB NDFIP2 CD38 CXXC5
23 NR4A1 MYO7A DDX58 LAT2
24 TSPYL2 FAM3C GBP1 KIR2DL3
25 RGS2 FABP5 IRF7 LEF1
26 MT1F PDCD1 IFIH1 CTSW
27 ERN1 HLA-DRA LY6E SPINT2
28 UBC ADGRG1 OASL SPRY2
29 ANXA1 ZNF683 DDX60L ITGAM
30 GADD45B CCND2 SAT1 CD63
31 IL7R TNFSF4 OAS2 FOS
32 SLC2A3 GAPDH LGALS9 LYN
33 PLIN2 SAMSN1 ZBP1 RRAS2
34 DDIT3 TNFRSF1B TYMP PRMT9
35 IER5 HLA-DQA1 MT2A CD9
36 LMNA LGALS3 GBP4 TRG-AS1
37 NFKBIA HLA-DRB1 HSH2D MATK
38 PFKFB3 CSF1 LAMP3 CD300A
39 TNF DGKH LAG3 ZNF331
40 NABP1 LAG3 RTP4 FCRL6
41 CREM ACP5 GPR155 CYSLTR1
42 IER5L DUSP4 ENDOD1 LTB
43 DDIT4 TNS3 IFITM3 IL2RB
44 ATF3 TBC1D4 ETV7 PTGDR
45 ZNF331 CCL3L1 EHD4 SRGAP3
46 HMGB2 LRRN3 SP140 PLAC8
47 AREG CHN1 CASP1 CCDC50
48 SLC5A3 ANKRD28 H3F3B RIN3
49 JAML ECE1 JUNB
50 CXCR6 GBP5 PTGER2
write_tsv(marker_sheet, paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T.cell_marker_sheet.tsv"))
seu_obj_cd8$cluster_label <- unname(deframe(cluster_label_tbl)[as.character(unlist(seu_obj_cd8[[paste0("RNA_snn_res.", louvain_resolution)]]))])

seu_obj_sub_sub <- seu_obj_sub

cluster_label_tbl_1 <- as_tibble(cbind(cell_id=colnames(seu_obj_sub), FetchData(seu_obj_sub, c("cluster_label"))))
cluster_label_tbl_2 <- as_tibble(cbind(cell_id=colnames(seu_obj_cd8), FetchData(seu_obj_cd8, c("cluster_label"))))

cluster_label_tbl <- left_join(cluster_label_tbl_1, cluster_label_tbl_2, by = "cell_id") %>%
  mutate(cluster_label = ifelse(is.na(cluster_label.y), cluster_label.x, cluster_label.y))

seu_obj_sub_sub$cluster_label <- cluster_label_tbl$cluster_label

seu_obj_sub_sub <- subset(seu_obj_sub_sub, subset = cluster_label != "doublet.Plasma.cell")
write_rds(seu_obj_sub_sub, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/T.cell_processed_filtered_sub.rds")
root_cell <- "SPECTRUM-OV-036_S1_CD45P_PELVIC_PERITONEUM_CGGACACCAACGACTT"
seu_obj_cd8_sub <- subset(seu_obj_cd8, subset = cluster_label != "doublet.Plasma.cell" & cluster_label != "gd.T.cell")
seu_obj_cd8_sub <- subset(seu_obj_cd8_sub, cells = c(root_cell, colnames(seu_obj_cd8_sub)[colnames(seu_obj_cd8_sub)!=root_cell][-1]))
write_rds(seu_obj_sub_sub, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/T.cell_processed_filtered_sub.rds")
seu_obj_sub_sub <- read_rds("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/T.cell_processed_filtered_sub.rds")

dc_obj <- DiffusionMap(seu_obj_cd8_sub@reductions$harmony@cell.embeddings, k = 100)
dc_mat <- dc_obj@eigenvectors
colnames(dc_mat) <- paste0("DC_", 1:ncol(dc_mat))
seu_obj_cd8_sub[["DC"]] <- CreateDimReducObject(embeddings = dc_mat, key = "DC_", assay = DefaultAssay(seu_obj_cd8_sub))

write_rds(seu_obj_cd8_sub, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed_filtered.rds")

dpt_obj <- DPT(dc_obj)
write_rds(dpt_obj, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_sub_dpt.rds")
seu_obj_cd8_sub$DPT1 <- dpt_obj$DPT1

write_rds(seu_obj_cd8_sub, "/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed_filtered.rds")
seu_obj_cd8_sub <- read_rds("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/CD8.T_processed_filtered.rds")

3.5.1 UMAP

plot_data_sub_sub <- as_tibble(FetchData(seu_obj_sub_sub, c(myfeatures, "cluster_label"))) %>% 
  left_join(meta_tbl, by = "sample") %>% 
  mutate(patient_id = str_remove_all(patient_id, "SPECTRUM-OV-"),
         tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite))) %>% 
  mutate(cell_id = colnames(seu_obj_sub_sub),
         cluster_label = ordered(cluster_label, levels = my_subtypes),
         )
  
if (cell_sort == "CD45+") {
plot_data_sub_sub <- filter(plot_data_sub_sub, sort_parameters != "singlet, live, CD45-", !is.na(tumor_supersite))
}

if (cell_sort == "CD45-") {
plot_data_sub_sub <- filter(plot_data_sub_sub, sort_parameters != "singlet, live, CD45+", !is.na(tumor_supersite))
}

ggplot(plot_data_sub_sub, aes(umapharmony_1, umapharmony_2, color = cluster_label)) + 
  common_layers_disc +
  ggtitle("Sub cluster") +
  #facet_wrap(~cluster_label) +
  scale_color_manual(values = clrs$cluster_label[[coi]])

ggplot(plot_data_sub_sub, aes(umapharmony_1, umapharmony_2, color = patient_id_short)) + 
  common_layers_disc +
  ggtitle("Patient") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$patient_id_short)

ggplot(plot_data_sub_sub, aes(umapharmony_1, umapharmony_2, color = tumor_supersite)) + 
  # geom_point(aes(umapharmony_1, umapharmony_2), 
  #            color = "grey90", size = 0.01, 
  #            data = select(plot_data_sub_sub, -tumor_supersite)) +
  common_layers_disc +
  ggtitle("Site") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$tumor_supersite)

write_tsv(select(plot_data_sub_sub, cell_id, everything(), -umapharmony_1, -umapharmony_2, -contains("RNA_")), paste0("/work/shah/uhlitzf/data/SPECTRUM/freeze/v6/", coi, "_embedding_sub.tsv"))

3.5.2 UMAP & DC

plot_data_cd8_sub <- as_tibble(FetchData(seu_obj_cd8_sub, c(myfeatures, "cluster_label", "DC_1", "DC_2"))) %>% 
  left_join(meta_tbl, by = "sample") %>% 
  mutate(patient_id = str_remove_all(patient_id, "SPECTRUM-OV-"),
         tumor_supersite = ordered(tumor_supersite, levels = names(clrs$tumor_supersite))) %>% 
  mutate(cell_id = colnames(seu_obj_cd8_sub),
         cluster_label = ordered(cluster_label, levels = my_subtypes),
         )
  
if (cell_sort == "CD45+") {
plot_data_cd8_sub <- filter(plot_data_cd8_sub, sort_parameters != "singlet, live, CD45-", !is.na(tumor_supersite))
}

if (cell_sort == "CD45-") {
plot_data_cd8_sub <- filter(plot_data_cd8_sub, sort_parameters != "singlet, live, CD45+", !is.na(tumor_supersite))
}

ggplot(plot_data_cd8_sub, aes(umapharmony_1, umapharmony_2, color = cluster_label)) + 
  common_layers_disc +
  ggtitle("Sub cluster") +
  #facet_wrap(~cluster_label) +
  scale_color_manual(values = clrs$cluster_label[[coi]])

ggplot(plot_data_cd8_sub, aes(umapharmony_1, umapharmony_2, color = patient_id_short)) + 
  common_layers_disc +
  ggtitle("Patient") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$patient_id_short)

ggplot(plot_data_cd8_sub, aes(umapharmony_1, umapharmony_2, color = tumor_supersite)) + 
  common_layers_disc +
  ggtitle("Site") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$tumor_supersite)

ggplot(plot_data_cd8_sub, aes(DC_1, DC_2, color = cluster_label)) + 
  common_layers_disc +
  ggtitle("Sub cluster") +
  #facet_wrap(~cluster_label) +
  scale_color_manual(values = clrs$cluster_label[[coi]])

ggplot(plot_data_cd8_sub, aes(DC_1, DC_2, color = patient_id_short)) + 
  common_layers_disc +
  ggtitle("Patient") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$patient_id_short)

ggplot(plot_data_cd8_sub, aes(DC_1, DC_2, color = tumor_supersite)) + 
  common_layers_disc +
  ggtitle("Site") +
  # facet_wrap(~therapy) +
  scale_color_manual(values = clrs$tumor_supersite)

4 session info

devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.2 (2019-12-12)
##  os       Debian GNU/Linux 10 (buster)
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       Etc/UTC                     
##  date     2020-12-22                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package              * version    date       lib
##  abind                  1.4-5      2016-07-21 [2]
##  ape                    5.3        2019-03-17 [2]
##  assertthat             0.2.1      2019-03-21 [2]
##  backports              1.1.10     2020-09-15 [1]
##  bibtex                 0.4.2.2    2020-01-02 [2]
##  Biobase                2.46.0     2019-10-29 [2]
##  BiocGenerics           0.32.0     2019-10-29 [2]
##  BiocParallel           1.20.1     2019-12-21 [2]
##  bitops                 1.0-6      2013-08-17 [2]
##  boot                   1.3-24     2019-12-20 [3]
##  broom                  0.7.2      2020-10-20 [1]
##  callr                  3.4.2      2020-02-12 [1]
##  car                    3.0-8      2020-05-21 [1]
##  carData                3.0-4      2020-05-22 [1]
##  caTools                1.17.1.4   2020-01-13 [2]
##  cellranger             1.1.0      2016-07-27 [2]
##  class                  7.3-15     2019-01-01 [3]
##  cli                    2.0.2      2020-02-28 [1]
##  cluster                2.1.0      2019-06-19 [3]
##  codetools              0.2-16     2018-12-24 [3]
##  colorblindr          * 0.1.0      2020-01-13 [2]
##  colorspace           * 1.4-2      2019-12-29 [2]
##  cowplot              * 1.0.0      2019-07-11 [2]
##  crayon                 1.3.4      2017-09-16 [1]
##  curl                   4.3        2019-12-02 [2]
##  data.table             1.12.8     2019-12-09 [2]
##  DBI                    1.1.0      2019-12-15 [2]
##  dbplyr                 2.0.0      2020-11-03 [1]
##  DelayedArray           0.12.2     2020-01-06 [2]
##  DEoptimR               1.0-8      2016-11-19 [1]
##  desc                   1.2.0      2018-05-01 [2]
##  destiny              * 3.0.1      2020-01-16 [1]
##  devtools               2.2.1      2019-09-24 [2]
##  digest                 0.6.25     2020-02-23 [1]
##  dplyr                * 1.0.2      2020-08-18 [1]
##  e1071                  1.7-3      2019-11-26 [1]
##  ellipsis               0.3.1      2020-05-15 [1]
##  evaluate               0.14       2019-05-28 [2]
##  fansi                  0.4.1      2020-01-08 [2]
##  farver                 2.0.3      2020-01-16 [1]
##  fitdistrplus           1.0-14     2019-01-23 [2]
##  forcats              * 0.5.0      2020-03-01 [1]
##  foreign                0.8-74     2019-12-26 [3]
##  formattable            0.2.0.1    2016-08-05 [1]
##  fs                     1.5.0      2020-07-31 [1]
##  future                 1.15.1     2019-11-25 [2]
##  future.apply           1.4.0      2020-01-07 [2]
##  gbRd                   0.4-11     2012-10-01 [2]
##  gdata                  2.18.0     2017-06-06 [2]
##  generics               0.0.2      2018-11-29 [2]
##  GenomeInfoDb           1.22.0     2019-10-29 [2]
##  GenomeInfoDbData       1.2.2      2020-01-14 [2]
##  GenomicRanges          1.38.0     2019-10-29 [2]
##  ggplot.multistats      1.0.0      2019-10-28 [1]
##  ggplot2              * 3.3.2      2020-06-19 [1]
##  ggrepel                0.8.1      2019-05-07 [2]
##  ggridges               0.5.2      2020-01-12 [2]
##  ggthemes               4.2.0      2019-05-13 [1]
##  globals                0.12.5     2019-12-07 [2]
##  glue                   1.3.2      2020-03-12 [1]
##  gplots                 3.0.1.2    2020-01-11 [2]
##  gridExtra              2.3        2017-09-09 [2]
##  gtable                 0.3.0      2019-03-25 [2]
##  gtools                 3.8.1      2018-06-26 [2]
##  haven                  2.3.1      2020-06-01 [1]
##  hexbin                 1.28.0     2019-11-11 [2]
##  hms                    0.5.3      2020-01-08 [1]
##  htmltools              0.4.0      2019-10-04 [2]
##  htmlwidgets            1.5.1      2019-10-08 [2]
##  httr                   1.4.2      2020-07-20 [1]
##  ica                    1.0-2      2018-05-24 [2]
##  igraph                 1.2.5      2020-03-19 [1]
##  IRanges                2.20.2     2020-01-13 [2]
##  irlba                  2.3.3      2019-02-05 [2]
##  jsonlite               1.7.1      2020-09-07 [1]
##  KernSmooth             2.23-16    2019-10-15 [3]
##  knitr                  1.26       2019-11-12 [2]
##  knn.covertree          1.0        2019-10-28 [1]
##  labeling               0.3        2014-08-23 [2]
##  laeken                 0.5.1      2020-02-05 [1]
##  lattice                0.20-38    2018-11-04 [3]
##  lazyeval               0.2.2      2019-03-15 [2]
##  leiden                 0.3.1      2019-07-23 [2]
##  lifecycle              0.2.0      2020-03-06 [1]
##  listenv                0.8.0      2019-12-05 [2]
##  lmtest                 0.9-37     2019-04-30 [2]
##  lsei                   1.2-0      2017-10-23 [2]
##  lubridate              1.7.9.2    2020-11-13 [1]
##  magrittr             * 2.0.1      2020-11-17 [1]
##  MASS                   7.3-51.5   2019-12-20 [3]
##  Matrix                 1.2-18     2019-11-27 [3]
##  matrixStats            0.56.0     2020-03-13 [1]
##  memoise                1.1.0      2017-04-21 [2]
##  metap                  1.2        2019-12-08 [2]
##  mnormt                 1.5-5      2016-10-15 [2]
##  modelr                 0.1.8      2020-05-19 [1]
##  multcomp               1.4-12     2020-01-10 [2]
##  multtest               2.42.0     2019-10-29 [2]
##  munsell                0.5.0      2018-06-12 [2]
##  mutoss                 0.1-12     2017-12-04 [2]
##  mvtnorm                1.0-12     2020-01-09 [2]
##  nlme                   3.1-143    2019-12-10 [3]
##  nnet                   7.3-12     2016-02-02 [3]
##  npsurv                 0.4-0      2017-10-14 [2]
##  numDeriv               2016.8-1.1 2019-06-06 [2]
##  openxlsx               4.1.5      2020-05-06 [1]
##  pbapply                1.4-2      2019-08-31 [2]
##  pcaMethods             1.78.0     2019-10-29 [2]
##  pillar                 1.4.6      2020-07-10 [1]
##  pkgbuild               1.0.6      2019-10-09 [2]
##  pkgconfig              2.0.3      2019-09-22 [1]
##  pkgload                1.0.2      2018-10-29 [2]
##  plotly                 4.9.1      2019-11-07 [2]
##  plotrix                3.7-7      2019-12-05 [2]
##  plyr                   1.8.5      2019-12-10 [2]
##  png                    0.1-7      2013-12-03 [2]
##  prettyunits            1.1.1      2020-01-24 [1]
##  processx               3.4.2      2020-02-09 [1]
##  progeny              * 1.11.3     2020-10-22 [1]
##  proxy                  0.4-24     2020-04-25 [1]
##  ps                     1.3.2      2020-02-13 [1]
##  purrr                * 0.3.4      2020-04-17 [1]
##  R.methodsS3            1.7.1      2016-02-16 [2]
##  R.oo                   1.23.0     2019-11-03 [2]
##  R.utils                2.9.2      2019-12-08 [2]
##  R6                     2.4.1      2019-11-12 [1]
##  ranger                 0.12.1     2020-01-10 [1]
##  RANN                   2.6.1      2019-01-08 [2]
##  rappdirs               0.3.1      2016-03-28 [2]
##  RColorBrewer           1.1-2      2014-12-07 [2]
##  Rcpp                   1.0.4      2020-03-17 [1]
##  RcppAnnoy              0.0.16     2020-03-08 [1]
##  RcppEigen              0.3.3.7.0  2019-11-16 [2]
##  RcppHNSW               0.2.0      2019-09-20 [2]
##  RcppParallel           4.4.4      2019-09-27 [2]
##  RCurl                  1.98-1.1   2020-01-19 [1]
##  Rdpack                 0.11-1     2019-12-14 [2]
##  readr                * 1.4.0      2020-10-05 [1]
##  readxl               * 1.3.1      2019-03-13 [2]
##  rematch                1.0.1      2016-04-21 [2]
##  remotes                2.1.0      2019-06-24 [2]
##  reprex                 0.3.0      2019-05-16 [2]
##  reshape2               1.4.3      2017-12-11 [2]
##  reticulate             1.14       2019-12-17 [2]
##  rio                    0.5.16     2018-11-26 [1]
##  rlang                  0.4.8      2020-10-08 [1]
##  rmarkdown              2.0        2019-12-12 [2]
##  robustbase             0.93-6     2020-03-23 [1]
##  ROCR                   1.0-7      2015-03-26 [2]
##  rprojroot              1.3-2      2018-01-03 [2]
##  RSpectra               0.16-0     2019-12-01 [2]
##  rstudioapi             0.11       2020-02-07 [1]
##  rsvd                   1.0.3      2020-02-17 [1]
##  Rtsne                  0.15       2018-11-10 [2]
##  rvest                  0.3.6      2020-07-25 [1]
##  S4Vectors              0.24.2     2020-01-13 [2]
##  sandwich               2.5-1      2019-04-06 [2]
##  scales                 1.1.0      2019-11-18 [2]
##  scatterplot3d          0.3-41     2018-03-14 [1]
##  sctransform            0.2.1      2019-12-17 [2]
##  SDMTools               1.1-221.2  2019-11-30 [2]
##  sessioninfo            1.1.1      2018-11-05 [2]
##  Seurat               * 3.1.2      2019-12-12 [2]
##  SingleCellExperiment   1.8.0      2019-10-29 [2]
##  smoother               1.1        2015-04-16 [1]
##  sn                     1.5-4      2019-05-14 [2]
##  sp                     1.4-2      2020-05-20 [1]
##  stringi                1.5.3      2020-09-09 [1]
##  stringr              * 1.4.0      2019-02-10 [1]
##  SummarizedExperiment   1.16.1     2019-12-19 [2]
##  survival               3.1-8      2019-12-03 [3]
##  testthat               2.3.2      2020-03-02 [1]
##  TFisher                0.2.0      2018-03-21 [2]
##  TH.data                1.0-10     2019-01-21 [2]
##  tibble               * 3.0.4      2020-10-12 [1]
##  tidyr                * 1.1.2      2020-08-27 [1]
##  tidyselect             1.1.0      2020-05-11 [1]
##  tidyverse            * 1.3.0      2019-11-21 [2]
##  tsne                   0.1-3      2016-07-15 [2]
##  TTR                    0.23-6     2019-12-15 [1]
##  usethis                1.5.1      2019-07-04 [2]
##  uwot                   0.1.5      2019-12-04 [2]
##  vcd                    1.4-7      2020-04-02 [1]
##  vctrs                  0.3.5      2020-11-17 [1]
##  VIM                    6.0.0      2020-05-08 [1]
##  viridis              * 0.5.1      2018-03-29 [2]
##  viridisLite          * 0.3.0      2018-02-01 [2]
##  withr                  2.3.0      2020-09-22 [1]
##  xfun                   0.12       2020-01-13 [2]
##  xml2                   1.3.2      2020-04-23 [1]
##  xts                    0.12-0     2020-01-19 [1]
##  XVector                0.26.0     2019-10-29 [2]
##  yaml                   2.2.1      2020-02-01 [1]
##  zip                    2.0.4      2019-09-01 [1]
##  zlibbioc               1.32.0     2019-10-29 [2]
##  zoo                    1.8-7      2020-01-10 [2]
##  source                                 
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  Bioconductor                           
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Github (clauswilke/colorblindr@1ac3d4d)
##  R-Forge (R 3.6.2)                      
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  Bioconductor                           
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Github (saezlab/progeny@94bea1c)       
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.3)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.3)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
##  CRAN (R 3.6.2)                         
##  Bioconductor                           
##  CRAN (R 3.6.2)                         
## 
## [1] /home/uhlitzf/R/lib
## [2] /usr/local/lib/R/site-library
## [3] /usr/local/lib/R/library